Word cloud for OneTwo.txt
# word cloud for OneTwo.txt
df1$doc_id=1:nrow(df1)
colnames(df1)[1]<-"text"
#Here we interpret each line in the document as separate document
mycorpus <- Corpus(DataframeSource(df1)) #Creating corpus (collection of text data)
mycorpus <- tm_map(mycorpus, removePunctuation)
mycorpus <- tm_map(mycorpus, function(x) removeWords(x, stopwords("english")))
tdm <- TermDocumentMatrix(mycorpus) #Creating term-document matrix
m <- as.matrix(tdm)
#here we merge all rows
v <- sort(rowSums(m),decreasing=TRUE) #Sum up the frequencies of each word
d <- data.frame(word = names(v),freq=v) #Create one column=names, second=frequences
pal <- brewer.pal(8,"Dark2")
pal <- pal[-(1:2)] #Create palette of colors
wordcloud(d$word,d$freq, scale=c(8,.3),min.freq=2,max.words=100, random.order=F, rot.per=.15, colors=pal, vfont=c("sans serif","plain"))
Word cloud for Five.txt
# wordcloud for Five.txt
df2$doc_id=1:nrow(df2)
colnames(df2)[1]<-"text"
#Here we interpret each line in the document as separate document
mycorpus <- Corpus(DataframeSource(df2)) #Creating corpus (collection of text data)
mycorpus <- tm_map(mycorpus, removePunctuation)
mycorpus <- tm_map(mycorpus, function(x) removeWords(x, stopwords("english")))
tdm <- TermDocumentMatrix(mycorpus) #Creating term-document matrix
m <- as.matrix(tdm)
#here we merge all rows
v <- sort(rowSums(m),decreasing=TRUE) #Sum up the frequencies of each word
d <- data.frame(word = names(v),freq=v) #Create one column=names, second=frequences
pal <- brewer.pal(5,"Dark2")
pal <- pal[-(1:2)] #Create palette of colors
wordcloud(d$word,d$freq, scale=c(8,.3),min.freq=2,max.words=100, random.order=F, rot.per=.15, colors=pal, vfont=c("sans serif","plain"))
Phrase nets for Five.Txt and OneTwo.Txt with connector words
Phrase net for Five.txt
Phrase net for OneTwo.txt
Word Trees
# Olive data
olive <- read.csv("olive.csv", sep = ",", header = TRUE)
# convert region to factor
olive$Region <-as.factor(olive$Region)
olive_shared <- SharedData$new(olive)
eic_lin_scatt <- olive_shared %>%
plot_ly(x = ~linoleic, y =~ eicosenoic) #%>%
#add_markers(list(size = 6, color = "orange"))
We found, for unusually low observations of Eicosenoic it takes the values of 1,2,and 3.
Using persistent brushing we identified regions 2 and 3 as the corresponding regions to the low values of eicosenoic. Interacting with the slider we can observe the relationship of Eicosenoic and Linoleic and their distribution in the various regions with respect to the range of Stearic variable. Thus, for different ranges of Stearic, the distribution between Eicosenoic and Linoleic changes as well as regions distributions.
Interaction operators used in this exercise were: * Filtering operators: This operator reduce the amount of data to be visualized, therefore by using the slider to select the range of Stearic, the data to be visualized is reduced.
Selection operators: This is used to isolate or subset data that need further visualization techniques applied. Hold and dragging the mouse to select the low values of Eicosenoic isolates this set for persistent brushing to identify the regions they belong to.
Connection operators: Linking the different plots to each other such that selection in one plot influences the next plot. In this exercise the scatter plot of Eicosenoic vs Linoleic and the bar graph of Regions are linked.
bar_chart <- olive_shared %>%
plot_ly( x =~ Region) %>%
add_histogram() %>%
layout(barmode = "overlay"
)
# linking the scatterplot and bar chart
#subplot(eic_lin_scatt, bar_chart) %>%
# highlight(on = "plotly_select", dynamic = T, persistent = T, opacityDim = I(1)) %>%
# hide_legend()
bscols(widths=c(2, NA),filter_slider("S", "Stearic", olive_shared, ~stearic)
,subplot(eic_lin_scatt, bar_chart)%>%
highlight(on="plotly_select", dynamic=T, persistent = T, opacityDim = I(1))%>%hide_legend())
knitr::include_graphics("pers.png")
Persistent Brushing on outliers
Oultiers in
ara_lin_scatt <- olive_shared %>%
plot_ly(x = ~linolenic, y =~ arachidic)
subplot(eic_lin_scatt, ara_lin_scatt) %>%
highlight(on = "plotly_select", dynamic = T, persistent = T, opacityDim = I(1)) %>%
hide_legend()
## No trace type specified:
## Based on info supplied, a 'scatter' trace seems appropriate.
## Read more about this trace type -> https://plot.ly/r/reference/#scatter
## No scatter mode specifed:
## Setting the mode to markers
## Read more about this attribute -> https://plot.ly/r/reference/#scatter-mode
## No trace type specified:
## Based on info supplied, a 'scatter' trace seems appropriate.
## Read more about this trace type -> https://plot.ly/r/reference/#scatter
## No scatter mode specifed:
## Setting the mode to markers
## Read more about this attribute -> https://plot.ly/r/reference/#scatter-mode
## Adding more colors to the selection color palette.
## We recommend setting `persistent` to `FALSE` (the default) because persistent selection mode can now be used by holding the shift key (while triggering the `on` event).
## Setting the `off` event (i.e., 'plotly_relayout') to match the `on` event (i.e., 'plotly_selected'). You can change this default via the `highlight()` function.
knitr::include_graphics("qn2_3.png")
Outliers
# the eight acids (column 4:11)
parcord <- ggparcoord(olive, columns = c(3:10))
# plotly_data returns data associated with a plotly visualization.
p_data <- plotly_data(ggplotly(parcord)) %>% group_by(.ID)
par_plot <- plot_ly(shared1, x = ~variable, y =~value) %>%
add_lines(line = list(width = 0.3)) %>%
add_markers(marker = list(size = 0.3),
text = ~.ID, hoverinfo = "text")
bscols(par_plot%>%highlight(on="plotly_select", dynamic=T, persistent = T, opacityDim = I(1))%>%
hide_legend(),
region_bar%>%highlight(on="plotly_click", dynamic=T, persistent = T)%>%hide_legend())
## Adding more colors to the selection color palette.
## We recommend setting `persistent` to `FALSE` (the default) because persistent selection mode can now be used by holding the shift key (while triggering the `on` event).
## Adding more colors to the selection color palette.
## We recommend setting `persistent` to `FALSE` (the default) because persistent selection mode can now be used by holding the shift key (while triggering the `on` event).
## Setting the `off` event (i.e., 'plotly_relayout') to match the `on` event (i.e., 'plotly_selected'). You can change this default via the `highlight()` function.
## Setting the `off` event (i.e., 'plotly_doubleclick') to match the `on` event (i.e., 'plotly_click'). You can change this default via the `highlight()` function.
ps<-htmltools::tagList(par_plot%>%
highlight(on="plotly_select", dynamic=T, persistent = T, opacityDim = I(1))%>%
hide_legend(),
region_bar%>%
highlight(on="plotly_click", dynamic=T, persistent = T, opacityDim = I(1))%>%
hide_legend()
)
htmltools::browsable(ps)
# ignore this
subplot(par_plot, region_bar, nrows =2) %>%
highlight(on="plotly_select", dynamic=T, persistent = T, opacityDim = I(1))%>%
hide_legend()
library(ggplot2)
library(plotly)
library(tm)
library(wordcloud)
library(RColorBrewer)
library(crosstalk)
library(GGally)
library(htmltools)
df1<-read.table("OneTwo.txt",header=F, sep='\n') #Read file
df2<-read.table("Five.txt",header=F, sep='\n')
# word cloud for OneTwo.txt
df1$doc_id=1:nrow(df1)
colnames(df1)[1]<-"text"
#Here we interpret each line in the document as separate document
mycorpus <- Corpus(DataframeSource(df1)) #Creating corpus (collection of text data)
mycorpus <- tm_map(mycorpus, removePunctuation)
mycorpus <- tm_map(mycorpus, function(x) removeWords(x, stopwords("english")))
tdm <- TermDocumentMatrix(mycorpus) #Creating term-document matrix
m <- as.matrix(tdm)
#here we merge all rows
v <- sort(rowSums(m),decreasing=TRUE) #Sum up the frequencies of each word
d <- data.frame(word = names(v),freq=v) #Create one column=names, second=frequences
pal <- brewer.pal(8,"Dark2")
pal <- pal[-(1:2)] #Create palette of colors
wordcloud(d$word,d$freq, scale=c(8,.3),min.freq=2,max.words=100, random.order=F, rot.per=.15, colors=pal, vfont=c("sans serif","plain"))
# wordcloud for Five.txt
df2$doc_id=1:nrow(df2)
colnames(df2)[1]<-"text"
#Here we interpret each line in the document as separate document
mycorpus <- Corpus(DataframeSource(df2)) #Creating corpus (collection of text data)
mycorpus <- tm_map(mycorpus, removePunctuation)
mycorpus <- tm_map(mycorpus, function(x) removeWords(x, stopwords("english")))
tdm <- TermDocumentMatrix(mycorpus) #Creating term-document matrix
m <- as.matrix(tdm)
#here we merge all rows
v <- sort(rowSums(m),decreasing=TRUE) #Sum up the frequencies of each word
d <- data.frame(word = names(v),freq=v) #Create one column=names, second=frequences
pal <- brewer.pal(5,"Dark2")
pal <- pal[-(1:2)] #Create palette of colors
wordcloud(d$word,d$freq, scale=c(8,.3),min.freq=2,max.words=100, random.order=F, rot.per=.15, colors=pal, vfont=c("sans serif","plain"))
# Olive data
olive <- read.csv("olive.csv", sep = ",", header = TRUE)
# convert region to factor
olive$Region <-as.factor(olive$Region)
olive_shared <- SharedData$new(olive)
eic_lin_scatt <- olive_shared %>%
plot_ly(x = ~linoleic, y =~ eicosenoic) #%>%
#add_markers(list(size = 6, color = "orange"))
bar_chart <- olive_shared %>%
plot_ly( x =~ Region) %>%
add_histogram() %>%
layout(barmode = "overlay"
)
# linking the scatterplot and bar chart
#subplot(eic_lin_scatt, bar_chart) %>%
# highlight(on = "plotly_select", dynamic = T, persistent = T, opacityDim = I(1)) %>%
# hide_legend()
bscols(widths=c(2, NA),filter_slider("S", "Stearic", olive_shared, ~stearic)
,subplot(eic_lin_scatt, bar_chart)%>%
highlight(on="plotly_select", dynamic=T, persistent = T, opacityDim = I(1))%>%hide_legend())
knitr::include_graphics("pers.png")
ara_lin_scatt <- olive_shared %>%
plot_ly(x = ~linolenic, y =~ arachidic)
subplot(eic_lin_scatt, ara_lin_scatt) %>%
highlight(on = "plotly_select", dynamic = T, persistent = T, opacityDim = I(1)) %>%
hide_legend()
knitr::include_graphics("qn2_3.png")
# the eight acids (column 4:11)
parcord <- ggparcoord(olive, columns = c(3:10))
# plotly_data returns data associated with a plotly visualization.
p_data <- plotly_data(ggplotly(parcord)) %>% group_by(.ID)
# data for crosstalk
shared1<-SharedData$new(p_data, ~.ID, group = "Olive")
par_plot <- plot_ly(shared1, x = ~variable, y =~value) %>%
add_lines(line = list(width = 0.3)) %>%
add_markers(marker = list(size = 0.3),
text = ~.ID, hoverinfo = "text")
# subset
olive2 <- olive
# create an id
olive2$.ID <- 1:nrow(olive)
# shared data 2
shared2 <- SharedData$new(olive2, ~.ID, group = "olive")
# bar graph of region
region_bar <- shared2 %>%
plot_ly( x =~ Region) %>%
add_histogram() %>%
layout(barmode = "overlay")
bscols(par_plot%>%highlight(on="plotly_select", dynamic=T, persistent = T, opacityDim = I(1))%>%
hide_legend(),
region_bar%>%highlight(on="plotly_click", dynamic=T, persistent = T)%>%hide_legend())
ps<-htmltools::tagList(par_plot%>%
highlight(on="plotly_select", dynamic=T, persistent = T, opacityDim = I(1))%>%
hide_legend(),
region_bar%>%
highlight(on="plotly_click", dynamic=T, persistent = T, opacityDim = I(1))%>%
hide_legend()
)
htmltools::browsable(ps)
# ignore this
subplot(par_plot, region_bar, nrows =2) %>%
highlight(on="plotly_select", dynamic=T, persistent = T, opacityDim = I(1))%>%
hide_legend()
# variable selection
ButtonsX=list()
for (i in 4:8){
ButtonsX[[i-3]]= list(method = "restyle",
args = list( "x", list(olive2[[i]])),
label = colnames(olive2)[i])
}
ButtonsY=list()
for (i in 4:8){
ButtonsY[[i-3]]= list(method = "restyle",
args = list( "y", list(olive2[[i]])),
label = colnames(olive2)[i])
}
ButtonsZ=list()
for (i in 4:8){
ButtonsZ[[i-3]]= list(method = "restyle",
args = list( "z", list(olive2[[i]])),
label = colnames(olive2)[i])
}
plot3d <- plot_ly(shared2,x=~oleic, y=~linolenic, z=~palmitic)%>%
add_markers() %>%
layout(xaxis=list(title=""), yaxis=list(title="", zaxis =list(titles="")),
title = "3d Scatter plot",
updatemenus = list(
list(x=0.07, y=0.6, buttons = ButtonsX, showactive = TRUE, method = "update" ),
list(x =0.07, y=0.7, buttons = ButtonsY, showactive = TRUE, method = "update" ),
list(x =0.07, y= .8, buttons = ButtonsZ, showactive = TRUE, method = "update" )
),
annotations = list(
list(text = "X", x= 0, y = 0.6, showarrow = FALSE),
list(text = "Y", x = 0, y = 0.7, showarrow = FALSE),
list(text = "Z", x = 0, y = .8, showarrow = FALSE)
)
)
plot3d